#include "internal_includes/tokens.h"
#include "internal_includes/structs.h"
#include "internal_includes/decode.h"
#include "stdlib.h"
#include "stdio.h"
#include "internal_includes/reflect.h"
#include "internal_includes/debug.h"
#include "internal_includes/hlslcc_malloc.h"

#define FOURCC(a, b, c, d) ((uint32_t)(uint8_t)(a) | ((uint32_t)(uint8_t)(b) << 8) | ((uint32_t)(uint8_t)(c) << 16) | ((uint32_t)(uint8_t)(d) << 24 ))
static enum {FOURCC_DXBC = FOURCC('D', 'X', 'B', 'C')}; //DirectX byte code
static enum {FOURCC_SHDR = FOURCC('S', 'H', 'D', 'R')}; //Shader model 4 code
static enum {FOURCC_SHEX = FOURCC('S', 'H', 'E', 'X')}; //Shader model 5 code
static enum {FOURCC_RDEF = FOURCC('R', 'D', 'E', 'F')}; //Resource definition (e.g. constant buffers)
static enum {FOURCC_ISGN = FOURCC('I', 'S', 'G', 'N')}; //Input signature
static enum {FOURCC_IFCE = FOURCC('I', 'F', 'C', 'E')}; //Interface (for dynamic linking)
static enum {FOURCC_OSGN = FOURCC('O', 'S', 'G', 'N')}; //Output signature

static enum {FOURCC_ISG1 = FOURCC('I', 'S', 'G', '1')}; //Input signature with Stream and MinPrecision
static enum {FOURCC_OSG1 = FOURCC('O', 'S', 'G', '1')}; //Output signature with Stream and MinPrecision
static enum {FOURCC_OSG5 = FOURCC('O', 'S', 'G', '5')}; //Output signature with Stream

typedef struct DXBCContainerHeaderTAG
{
	unsigned fourcc;
	uint32_t unk[4];
	uint32_t one;
	uint32_t totalSize;
	uint32_t chunkCount;
} DXBCContainerHeader;

typedef struct DXBCChunkHeaderTAG
{
	unsigned fourcc;
	unsigned size;
} DXBCChunkHeader;

#ifdef _DEBUG
static uint64_t operandID = 0;
static uint64_t instructionID = 0;
#endif

#if defined(_WIN32)
#define osSprintf(dest, size, src) sprintf_s(dest, size, src)
#else
#define osSprintf(dest, size, src) sprintf(dest, src)
#endif

void DecodeNameToken(const uint32_t* pui32NameToken, Operand* psOperand)
{
	const size_t MAX_BUFFER_SIZE = sizeof(psOperand->pszSpecialName);
	psOperand->eSpecialName = DecodeOperandSpecialName(*pui32NameToken);
	switch(psOperand->eSpecialName)
	{
	case NAME_UNDEFINED:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "undefined");
			break;
		}
	case NAME_POSITION:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "position");
			break;
		}
	case NAME_CLIP_DISTANCE:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "clipDistance");
			break;
		}
	case NAME_CULL_DISTANCE:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "cullDistance");
			break;
		}
	case NAME_RENDER_TARGET_ARRAY_INDEX:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "renderTargetArrayIndex");
			break;
		}
	case NAME_VIEWPORT_ARRAY_INDEX:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "viewportArrayIndex");
			break;
		}
	case NAME_VERTEX_ID:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "vertexID");
			break;
		}
	case NAME_PRIMITIVE_ID:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "primitiveID");
			break;
		}
	case NAME_INSTANCE_ID:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "instanceID");
			break;
		}
	case NAME_IS_FRONT_FACE:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "isFrontFace");
			break;
		}
	case NAME_SAMPLE_INDEX:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "sampleIndex");
			break;
		}
		//For the quadrilateral domain, there are 6 factors (4 sides, 2 inner).
	case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
	case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: 
	case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: 
	case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
	case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR:
	case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR:

		//For the triangular domain, there are 4 factors (3 sides, 1 inner)
	case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
	case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
	case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
	case NAME_FINAL_TRI_INSIDE_TESSFACTOR:

		//For the isoline domain, there are 2 factors (detail and density).
	case NAME_FINAL_LINE_DETAIL_TESSFACTOR:
	case NAME_FINAL_LINE_DENSITY_TESSFACTOR:
		{
			osSprintf(psOperand->pszSpecialName, MAX_BUFFER_SIZE, "tessFactor");
			break;
		}
	default:
		{
			ASSERT(0);
			break;
		}
	}

	return;
}

uint32_t DecodeOperand(const uint32_t *pui32Tokens, Operand* psOperand)
{
	int i;
	uint32_t ui32NumTokens = 1;
	OPERAND_NUM_COMPONENTS eNumComponents;

#ifdef _DEBUG
	psOperand->id = operandID++;
#endif

	//Some defaults
	psOperand->iWriteMaskEnabled = 1;
	psOperand->iGSInput = 0;
	psOperand->aeDataType[0] = SVT_FLOAT;
	psOperand->aeDataType[1] = SVT_FLOAT;
	psOperand->aeDataType[2] = SVT_FLOAT;
	psOperand->aeDataType[3] = SVT_FLOAT;

	psOperand->iExtended = DecodeIsOperandExtended(*pui32Tokens);


	psOperand->eModifier = OPERAND_MODIFIER_NONE;
	psOperand->psSubOperand[0] = 0;
	psOperand->psSubOperand[1] = 0;
	psOperand->psSubOperand[2] = 0;

	/* Check if this instruction is extended.  If it is,
	* we need to print the information first */
	if (psOperand->iExtended)
	{
		/* OperandToken1 is the second token */
		ui32NumTokens++;

		if(DecodeExtendedOperandType(pui32Tokens[1]) == EXTENDED_OPERAND_MODIFIER)
		{
			psOperand->eModifier = DecodeExtendedOperandModifier(pui32Tokens[1]);
			psOperand->eMinPrecision = DecodeOperandMinPrecision(pui32Tokens[1]);
		}

	}

	psOperand->iIndexDims = DecodeOperandIndexDimension(*pui32Tokens);
	psOperand->eType = DecodeOperandType(*pui32Tokens);

	psOperand->ui32RegisterNumber = 0;

	eNumComponents = DecodeOperandNumComponents(*pui32Tokens);

	switch(eNumComponents)
	{
	case OPERAND_1_COMPONENT:
		{
			psOperand->iNumComponents = 1;
			break;
		}
	case OPERAND_4_COMPONENT:
		{
			psOperand->iNumComponents = 4;
			break;
		}
	default:
		{
			psOperand->iNumComponents = 0;
			break;
		}
	}

	if(psOperand->iWriteMaskEnabled &&
		psOperand->iNumComponents == 4)
	{
		psOperand->eSelMode = DecodeOperand4CompSelMode(*pui32Tokens);

		if(psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
		{
			psOperand->ui32CompMask = DecodeOperand4CompMask(*pui32Tokens);
		}
		else
			if(psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
			{
				psOperand->ui32Swizzle = DecodeOperand4CompSwizzle(*pui32Tokens);

				if(psOperand->ui32Swizzle != NO_SWIZZLE)
				{
					psOperand->aui32Swizzle[0] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 0);
					psOperand->aui32Swizzle[1] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 1);
					psOperand->aui32Swizzle[2] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 2);
					psOperand->aui32Swizzle[3] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 3);
				}
				else
				{
					psOperand->aui32Swizzle[0] = OPERAND_4_COMPONENT_X;
					psOperand->aui32Swizzle[1] = OPERAND_4_COMPONENT_Y;
					psOperand->aui32Swizzle[2] = OPERAND_4_COMPONENT_Z;
					psOperand->aui32Swizzle[3] = OPERAND_4_COMPONENT_W;
				}
			}
			else
				if(psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
				{
					psOperand->aui32Swizzle[0] = DecodeOperand4CompSel1(*pui32Tokens);
				}
	}

	//Set externally to this function based on the instruction opcode.
	psOperand->iIntegerImmediate = 0;

	if(psOperand->eType == OPERAND_TYPE_IMMEDIATE32)
	{
		for(i=0; i< psOperand->iNumComponents; ++i)
		{
			psOperand->afImmediates[i] = *((float*)(&pui32Tokens[ui32NumTokens]));
			ui32NumTokens ++;
		}
	}
	else
		if(psOperand->eType == OPERAND_TYPE_IMMEDIATE64)
		{
			for(i=0; i< psOperand->iNumComponents; ++i)
			{
				psOperand->adImmediates[i] = *((double*)(&pui32Tokens[ui32NumTokens]));
				ui32NumTokens +=2;
			}
		}

		for(i=0; i <psOperand->iIndexDims; ++i)
		{
			OPERAND_INDEX_REPRESENTATION eRep = DecodeOperandIndexRepresentation(i ,*pui32Tokens);

			psOperand->eIndexRep[i] = eRep;

			psOperand->aui32ArraySizes[i] = 0;
			psOperand->ui32RegisterNumber = 0;

			switch(eRep)
			{
			case OPERAND_INDEX_IMMEDIATE32:
				{
					psOperand->ui32RegisterNumber = *(pui32Tokens+ui32NumTokens);
					psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber;
					break;
				}
			case OPERAND_INDEX_RELATIVE:
				{
					psOperand->psSubOperand[i] = hlslcc_malloc(sizeof(Operand));
					DecodeOperand(pui32Tokens+ui32NumTokens, psOperand->psSubOperand[i]);

					ui32NumTokens++;
					break;
				}
			case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
				{
					psOperand->ui32RegisterNumber = *(pui32Tokens+ui32NumTokens);
					psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber;

					ui32NumTokens++;

					psOperand->psSubOperand[i] = hlslcc_malloc(sizeof(Operand));
					DecodeOperand(pui32Tokens+ui32NumTokens, psOperand->psSubOperand[i]);

					ui32NumTokens++;
					break;
				}
			default:
				{
					ASSERT(0);
					break;
				}
			}

			ui32NumTokens++;
		}

		psOperand->pszSpecialName[0] ='\0';

		return ui32NumTokens;
}

const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, Declaration* psDecl)
{
	uint32_t ui32TokenLength = DecodeInstructionLength(*pui32Token);
	const uint32_t bExtended = DecodeIsOpcodeExtended(*pui32Token);
	const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token);
	uint32_t ui32OperandOffset = 1;

	if(eOpcode < NUM_OPCODES && eOpcode >= 0)
	{
		psShader->aiOpcodeUsed[eOpcode] = 1;
	}

	psDecl->eOpcode = eOpcode;

	psDecl->ui32TexReturnType = SVT_FLOAT;

	if(bExtended)
	{
		ui32OperandOffset = 2;
	}

	switch (eOpcode)
	{
	case OPCODE_DCL_RESOURCE: // DCL* opcodes have
		{
			ResourceBinding* psBinding = 0;
			psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token);
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);

			if (psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE &&
			    GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psShader->sInfo, &psBinding))
			{
				psDecl->ui32TexReturnType = psBinding->ui32ReturnType;
			}
			break;
		}
	case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats.
		{
			psDecl->value.eCBAccessPattern = DecodeConstantBufferAccessPattern(*pui32Token);
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_SAMPLER:
		{
			break;
		}
	case OPCODE_DCL_INDEX_RANGE:
		{
			psDecl->ui32NumOperands = 1;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			psDecl->value.ui32IndexRange = pui32Token[ui32OperandOffset];

			if(psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT)
			{
				uint32_t i;
				const uint32_t indexRange = psDecl->value.ui32IndexRange;
				const uint32_t reg = psDecl->asOperands[0].ui32RegisterNumber;

				psShader->aIndexedInput[reg] = indexRange;
				psShader->aIndexedInputParents[reg] = reg;

				//-1 means don't declare this input because it falls in
				//the range of an already declared array.
				for(i=reg+1; i<reg+indexRange; ++i)
				{
					psShader->aIndexedInput[i] = -1;
					psShader->aIndexedInputParents[i] = reg;
				}
			}

			if(psDecl->asOperands[0].eType == OPERAND_TYPE_OUTPUT)
			{
				psShader->aIndexedOutput[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.ui32IndexRange;
			}
			break;
		}
	case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
		{
			psDecl->value.eOutputPrimitiveTopology = DecodeGSOutputPrimitiveTopology(*pui32Token);
			break;
		}
	case OPCODE_DCL_GS_INPUT_PRIMITIVE:
		{
			psDecl->value.eInputPrimitive = DecodeGSInputPrimitive(*pui32Token);
			break;
		}
	case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
		{
			psDecl->value.ui32MaxOutputVertexCount = pui32Token[1];
			break;
		}
	case OPCODE_DCL_TESS_PARTITIONING:
		{
			psDecl->value.eTessPartitioning = DecodeTessPartitioning(*pui32Token);
			break;
		}
	case OPCODE_DCL_TESS_DOMAIN:
		{
			psDecl->value.eTessDomain = DecodeTessDomain(*pui32Token);
			break;
		}
	case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
		{
			psDecl->value.eTessOutPrim = DecodeTessOutPrim(*pui32Token);
			break;
		}
	case OPCODE_DCL_THREAD_GROUP:
		{
			psDecl->value.aui32WorkGroupSize[0] = pui32Token[1];
			psDecl->value.aui32WorkGroupSize[1] = pui32Token[2];
			psDecl->value.aui32WorkGroupSize[2] = pui32Token[3];
			break;
		}
	case OPCODE_DCL_INPUT:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_INPUT_SIV:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			if(psShader->eShaderType == PIXEL_SHADER)
			{
				psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token);

			}
			break;
		}
	case OPCODE_DCL_INPUT_PS:
		{
			psDecl->ui32NumOperands = 1;
			psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token);
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_INPUT_SGV:
	case OPCODE_DCL_INPUT_PS_SGV:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_INPUT_PS_SIV:
		{
			psDecl->ui32NumOperands = 1;
			psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token);
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_OUTPUT:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_OUTPUT_SGV:
		{
			break;
		}
	case OPCODE_DCL_OUTPUT_SIV:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_TEMPS:
		{
			psDecl->value.ui32NumTemps = *(pui32Token+ui32OperandOffset);
			break;
		}
	case OPCODE_DCL_INDEXABLE_TEMP:
		{
			psDecl->sIdxTemp.ui32RegIndex = *(pui32Token+ui32OperandOffset);
			psDecl->sIdxTemp.ui32RegCount = *(pui32Token+ui32OperandOffset+1);
			psDecl->sIdxTemp.ui32RegComponentSize = *(pui32Token+ui32OperandOffset+2);
			break;
		}
	case OPCODE_DCL_GLOBAL_FLAGS:
		{
			psDecl->value.ui32GlobalFlags = DecodeGlobalFlags(*pui32Token);
			break;
		}
	case OPCODE_DCL_INTERFACE:
		{
			uint32_t func = 0, numClassesImplementingThisInterface, arrayLen, interfaceID;
			interfaceID = pui32Token[ui32OperandOffset];
			ui32OperandOffset++;
			psDecl->ui32TableLength = pui32Token[ui32OperandOffset];
			ui32OperandOffset++;

			numClassesImplementingThisInterface = DecodeInterfaceTableLength(*(pui32Token+ui32OperandOffset));
			arrayLen = DecodeInterfaceArrayLength(*(pui32Token+ui32OperandOffset));

			ui32OperandOffset++;

			psDecl->value.interface.ui32InterfaceID = interfaceID;
			psDecl->value.interface.ui32NumFuncTables = numClassesImplementingThisInterface;
			psDecl->value.interface.ui32ArraySize = arrayLen;

			psShader->funcPointer[interfaceID].ui32NumBodiesPerTable = psDecl->ui32TableLength;

			for(;func < numClassesImplementingThisInterface; ++func)
			{
				uint32_t ui32FuncTable = *(pui32Token+ui32OperandOffset);
				psShader->aui32FuncTableToFuncPointer[ui32FuncTable] = interfaceID;

				psShader->funcPointer[interfaceID].aui32FuncTables[func] = ui32FuncTable;
				ui32OperandOffset++;
			}

			break;
		}
	case OPCODE_DCL_FUNCTION_BODY:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_FUNCTION_TABLE:
		{
			uint32_t ui32Func;
			const uint32_t ui32FuncTableID = pui32Token[ui32OperandOffset++];
			const uint32_t ui32NumFuncsInTable = pui32Token[ui32OperandOffset++];

			for(ui32Func=0; ui32Func<ui32NumFuncsInTable;++ui32Func)
			{
				const uint32_t ui32FuncBodyID = pui32Token[ui32OperandOffset++];

				psShader->aui32FuncBodyToFuncTable[ui32FuncBodyID] = ui32FuncTableID;

				psShader->funcTable[ui32FuncTableID].aui32FuncBodies[ui32Func] = ui32FuncBodyID;

			}

			// OpcodeToken0 is followed by a DWORD that represents the function table
			// identifier and another DWORD (TableLength) that gives the number of
			// functions in the table.
			//
			// This is followed by TableLength DWORDs which are function body indices.
			//

			break;
		}
	case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
		{
			break;
		}
	case OPCODE_HS_DECLS:
		{
			break;
		}
	case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
		{
			psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token);
			break;
		}
	case OPCODE_HS_JOIN_PHASE:
	case OPCODE_HS_FORK_PHASE:
	case OPCODE_HS_CONTROL_POINT_PHASE:
		{
			break;
		}
	case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
		{
			ASSERT(psShader->ui32ForkPhaseCount != 0);//Check for wrapping when we decrement.
			psDecl->value.aui32HullPhaseInstanceInfo[0] = psShader->ui32ForkPhaseCount-1;
			psDecl->value.aui32HullPhaseInstanceInfo[1] = pui32Token[1];
			break;
		}
	case OPCODE_CUSTOMDATA:
		{
			ui32TokenLength = pui32Token[1];
			{
				int iTupleSrc = 0, iTupleDest = 0;
				//const uint32_t ui32ConstCount = pui32Token[1] - 2;
				//const uint32_t ui32TupleCount = (ui32ConstCount / 4);
				CUSTOMDATA_CLASS eClass = DecodeCustomDataClass(pui32Token[0]);

				const uint32_t ui32NumVec4 = (ui32TokenLength - 2) / 4;
				uint32_t uIdx = 0;

				ICBVec4 const *pVec4Array = (void*) (pui32Token + 2);

				//The buffer will contain at least one value, but not more than 4096 scalars/1024 vec4's.
				ASSERT(ui32NumVec4 < MAX_IMMEDIATE_CONST_BUFFER_VEC4_SIZE);

				/* must be a multiple of 4 */
				ASSERT(((ui32TokenLength - 2) % 4) == 0);

				for (uIdx = 0; uIdx < ui32NumVec4; uIdx++)
				{
					psDecl->asImmediateConstBuffer[uIdx] = pVec4Array[uIdx];
				}

				psDecl->ui32NumOperands = ui32NumVec4;
			}
			break;
		}
	case OPCODE_DCL_HS_MAX_TESSFACTOR:
		{
			psDecl->value.fMaxTessFactor = *((float*)&pui32Token[1]);
			break;
		}
	case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
		{
			psDecl->ui32NumOperands = 2;
			psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token);
			psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token);
			psDecl->sUAV.bCounter = 0;
			psDecl->sUAV.ui32BufferSize = 0;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			psDecl->sUAV.Type = DecodeResourceReturnType(0, pui32Token[ui32OperandOffset]);
			break;
		}
	case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
		{
			ResourceBinding* psBinding = NULL;
			ConstantBuffer* psBuffer = NULL;

			psDecl->ui32NumOperands = 1;
			psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token);
			psDecl->sUAV.bCounter = 0;
			psDecl->sUAV.ui32BufferSize = 0;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			//This should be a RTYPE_UAV_RWBYTEADDRESS buffer. It is memory backed by
			//a shader storage buffer whose is unknown at compile time.
			psDecl->sUAV.ui32BufferSize = 0;
			break;
		}
	case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
		{
			ResourceBinding* psBinding = NULL;
			ConstantBuffer* psBuffer = NULL;

			psDecl->ui32NumOperands = 1;
			psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token);
			psDecl->sUAV.bCounter = 0;
			psDecl->sUAV.ui32BufferSize = 0;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);

			GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psShader->sInfo, &psBinding);

			GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psShader->sInfo, &psBuffer);
			psDecl->sUAV.ui32BufferSize = psBuffer->ui32TotalSizeInBytes;
			switch(psBinding->eType)
			{
			case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER:
			case RTYPE_UAV_APPEND_STRUCTURED:
			case RTYPE_UAV_CONSUME_STRUCTURED:
				psDecl->sUAV.bCounter = 1;
				break;
			default:
				break;
			}
			break;
		}
	case OPCODE_DCL_RESOURCE_STRUCTURED:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_RESOURCE_RAW:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
		{
			ResourceBinding* psBinding = NULL;
			ConstantBuffer* psBuffer = NULL;

			psDecl->ui32NumOperands = 1;
			psDecl->sUAV.ui32GloballyCoherentAccess = 0;

			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);

			psDecl->sTGSM.ui32Stride = pui32Token[ui32OperandOffset++];
			psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++];
			break;
		}
	case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
		{
			ResourceBinding* psBinding = NULL;
			ConstantBuffer* psBuffer = NULL;

			psDecl->ui32NumOperands = 1;
			psDecl->sUAV.ui32GloballyCoherentAccess = 0;

			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);

			psDecl->sTGSM.ui32Stride = 4;
			psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++] / 4;
			break;
		}
	case OPCODE_DCL_STREAM:
		{
			psDecl->ui32NumOperands = 1;
			DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]);
			break;
		}
	case OPCODE_DCL_GS_INSTANCE_COUNT:
		{
			psDecl->ui32NumOperands = 0;
			psDecl->value.ui32GSInstanceCount = pui32Token[1];
			break;
		}
	default:
		{
			//Reached end of declarations
			return 0;
		}
	}

	UpdateDeclarationReferences(psShader, psDecl);

	return pui32Token + ui32TokenLength;
}

const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psInst, Shader* psShader)
{
	uint32_t ui32TokenLength = DecodeInstructionLength(*pui32Token);
	const uint32_t bExtended = DecodeIsOpcodeExtended(*pui32Token);
	const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token);
	uint32_t ui32OperandOffset = 1;

#ifdef _DEBUG
	psInst->id = instructionID++;
#endif

	psInst->eOpcode = eOpcode;

	psInst->bSaturate = DecodeInstructionSaturate(*pui32Token);

	psInst->bAddressOffset = 0;

	psInst->ui32FirstSrc = 1;

	if(bExtended)
	{
		do {
			const uint32_t ui32ExtOpcodeToken = pui32Token[ui32OperandOffset];
			const EXTENDED_OPCODE_TYPE eExtType = DecodeExtendedOpcodeType(ui32ExtOpcodeToken);

			if(eExtType == EXTENDED_OPCODE_SAMPLE_CONTROLS)
			{
				psInst->bAddressOffset = 1;

				psInst->iUAddrOffset = DecodeImmediateAddressOffset(IMMEDIATE_ADDRESS_OFFSET_U, ui32ExtOpcodeToken);
				psInst->iVAddrOffset = DecodeImmediateAddressOffset(IMMEDIATE_ADDRESS_OFFSET_V, ui32ExtOpcodeToken);
				psInst->iWAddrOffset = DecodeImmediateAddressOffset(IMMEDIATE_ADDRESS_OFFSET_W, ui32ExtOpcodeToken);
			}
			else if(eExtType == EXTENDED_OPCODE_RESOURCE_RETURN_TYPE)
			{
				psInst->xType = DecodeExtendedResourceReturnType(0, ui32ExtOpcodeToken);
				psInst->yType = DecodeExtendedResourceReturnType(1, ui32ExtOpcodeToken);
				psInst->zType = DecodeExtendedResourceReturnType(2, ui32ExtOpcodeToken);
				psInst->wType = DecodeExtendedResourceReturnType(3, ui32ExtOpcodeToken);
			}
			else if(eExtType == EXTENDED_OPCODE_RESOURCE_DIM)
			{
				psInst->eResDim = DecodeExtendedResourceDimension(ui32ExtOpcodeToken);
			}

			ui32OperandOffset++;
		}
		while(DecodeIsOpcodeExtended(pui32Token[ui32OperandOffset-1]));
	}

	if(eOpcode < NUM_OPCODES && eOpcode >= 0)
	{
		psShader->aiOpcodeUsed[eOpcode] = 1;
	}

	switch (eOpcode)
	{
		//no operands
	case OPCODE_CUT:
	case OPCODE_EMIT:
	case OPCODE_EMITTHENCUT:
	case OPCODE_RET:
	case OPCODE_LOOP:
	case OPCODE_ENDLOOP:
	case OPCODE_BREAK:
	case OPCODE_ELSE:
	case OPCODE_ENDIF:
	case OPCODE_CONTINUE:
	case OPCODE_DEFAULT:
	case OPCODE_ENDSWITCH:
	case OPCODE_NOP:
	case OPCODE_HS_CONTROL_POINT_PHASE:
	case OPCODE_HS_FORK_PHASE:
	case OPCODE_HS_JOIN_PHASE:
		{
			psInst->ui32NumOperands = 0;
			psInst->ui32FirstSrc = 0;
			break;
		}
	case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
		{
			psInst->ui32NumOperands = 0;
			psInst->ui32FirstSrc = 0;
			break;
		}
	case OPCODE_SYNC:
		{
			psInst->ui32NumOperands = 0;
			psInst->ui32FirstSrc = 0;
			psInst->ui32SyncFlags = DecodeSyncFlags(*pui32Token);
			break;
		}

		//1 operand
	case OPCODE_EMIT_STREAM:
	case OPCODE_CUT_STREAM:
	case OPCODE_EMITTHENCUT_STREAM:
	case OPCODE_CASE:
	case OPCODE_SWITCH:
	case OPCODE_LABEL:
		{
			psInst->ui32NumOperands = 1;
			psInst->ui32FirstSrc = 0;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);

			//			if(eOpcode == OPCODE_CASE)
			//			{
			//				psInst->asOperands[0].iIntegerImmediate = 1;
			//			}
			break;
		}

	case OPCODE_INTERFACE_CALL:
		{
			psInst->ui32NumOperands = 1;
			psInst->ui32FirstSrc = 0;
			psInst->ui32FuncIndexWithinInterface = pui32Token[ui32OperandOffset];
			ui32OperandOffset++;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);

			break;
		}

		/* Floating point instruction decodes */

		//Instructions with two operands go here
	case OPCODE_MOV:
		{
			psInst->ui32NumOperands = 2;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);

			//Mov with an integer dest. If src is an immediate then it must be encoded as an integer.
			if(psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_SINT_16 ||
				psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_UINT_16)
			{
				psInst->asOperands[1].iIntegerImmediate = 1;
			}
			break;
		}
	case OPCODE_LOG:
	case OPCODE_RSQ:
	case OPCODE_EXP:
	case OPCODE_SQRT:
	case OPCODE_ROUND_PI:
	case OPCODE_ROUND_NI:
	case OPCODE_ROUND_Z:
	case OPCODE_ROUND_NE:
	case OPCODE_FRC:
	case OPCODE_FTOU:
	case OPCODE_FTOI:
	case OPCODE_UTOF:
	case OPCODE_ITOF:
	case OPCODE_INEG:
	case OPCODE_IMM_ATOMIC_ALLOC:
	case OPCODE_IMM_ATOMIC_CONSUME:
	case OPCODE_DMOV:
	case OPCODE_DTOF:
	case OPCODE_FTOD:
	case OPCODE_DRCP:
	case OPCODE_COUNTBITS:
	case OPCODE_FIRSTBIT_HI:
	case OPCODE_FIRSTBIT_LO:
	case OPCODE_FIRSTBIT_SHI:
	case OPCODE_BFREV:
	case OPCODE_F32TOF16:
	case OPCODE_F16TOF32:
	case OPCODE_RCP:
	case OPCODE_DERIV_RTX:
	case OPCODE_DERIV_RTY:
	case OPCODE_DERIV_RTX_COARSE:
	case OPCODE_DERIV_RTX_FINE:
	case OPCODE_DERIV_RTY_COARSE:
	case OPCODE_DERIV_RTY_FINE:
	case OPCODE_NOT:
		{
			psInst->ui32NumOperands = 2;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			break;
		}

		//Instructions with three operands go here
	case OPCODE_SINCOS:
		{
			psInst->ui32FirstSrc = 2;
			//Intentional fall-through
		}
	case OPCODE_IMIN:
	case OPCODE_UMIN:
	case OPCODE_MIN:
	case OPCODE_IMAX:
	case OPCODE_UMAX:
	case OPCODE_MAX:
	case OPCODE_MUL:
	case OPCODE_DIV:
	case OPCODE_ADD:
	case OPCODE_DP2:
	case OPCODE_DP3:
	case OPCODE_DP4:
	case OPCODE_NE:
	case OPCODE_OR:
	case OPCODE_XOR:
	case OPCODE_LT:
	case OPCODE_IEQ:
	case OPCODE_IADD:
	case OPCODE_AND:
	case OPCODE_GE:
	case OPCODE_IGE:
	case OPCODE_EQ:
	case OPCODE_ISHL:
	case OPCODE_ISHR:
	case OPCODE_LD:
	case OPCODE_ILT:
	case OPCODE_INE:
	case OPCODE_ATOMIC_AND:
	case OPCODE_ATOMIC_IADD:
	case OPCODE_ATOMIC_OR:
	case OPCODE_ATOMIC_XOR:
	case OPCODE_ATOMIC_IMAX:
	case OPCODE_ATOMIC_IMIN:
	case OPCODE_DADD:
	case OPCODE_DMAX:
	case OPCODE_DMIN:
	case OPCODE_DMUL:
	case OPCODE_DEQ:
	case OPCODE_DGE:
	case OPCODE_DLT:
	case OPCODE_DNE:
	case OPCODE_DDIV:
		{
			psInst->ui32NumOperands = 3;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			break;
		}
	case OPCODE_UGE:
	case OPCODE_ULT:
	case OPCODE_USHR:
	case OPCODE_ATOMIC_UMAX:
	case OPCODE_ATOMIC_UMIN:
		{
			psInst->ui32NumOperands = 3;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			break;
		}
		//Instructions with four operands go here
	case OPCODE_MAD:
	case OPCODE_MOVC:
	case OPCODE_IMAD:
	case OPCODE_UDIV:
	case OPCODE_LOD:
	case OPCODE_SAMPLE:
	case OPCODE_GATHER4:
	case OPCODE_LD_MS:
	case OPCODE_UBFE:
	case OPCODE_IBFE:
	case OPCODE_ATOMIC_CMP_STORE:
	case OPCODE_IMM_ATOMIC_IADD:
	case OPCODE_IMM_ATOMIC_AND:
	case OPCODE_IMM_ATOMIC_OR:
	case OPCODE_IMM_ATOMIC_XOR:
	case OPCODE_IMM_ATOMIC_EXCH:
	case OPCODE_IMM_ATOMIC_IMAX:
	case OPCODE_IMM_ATOMIC_IMIN:
	case OPCODE_DMOVC:
	case OPCODE_DFMA:
	case OPCODE_IMUL:
		{
			psInst->ui32NumOperands = 4;

			if(eOpcode == OPCODE_IMUL)
			{
				psInst->ui32FirstSrc = 2;
			}

			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]);

			break;
		}
	case OPCODE_UADDC:
	case OPCODE_USUBB:
	case OPCODE_IMM_ATOMIC_UMAX:
	case OPCODE_IMM_ATOMIC_UMIN:
		{
			psInst->ui32NumOperands = 4;

			if(eOpcode == OPCODE_IMUL)
			{
				psInst->ui32FirstSrc = 2;
			}

			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]);

			break;
		}
	case OPCODE_GATHER4_PO:
	case OPCODE_SAMPLE_L:
	case OPCODE_BFI:
	case OPCODE_SWAPC:
	case OPCODE_IMM_ATOMIC_CMP_EXCH:
		{
			psInst->ui32NumOperands = 5;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]);

			break;
		}
	case OPCODE_GATHER4_C:
	case OPCODE_SAMPLE_C:
	case OPCODE_SAMPLE_C_LZ:
	case OPCODE_SAMPLE_B:
		{
			psInst->ui32NumOperands = 5;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]);
			break;
		}
	case OPCODE_GATHER4_PO_C:
	case OPCODE_SAMPLE_D:
		{
			psInst->ui32NumOperands = 6;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[4]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[5]);
			break;
		}
	case OPCODE_IF:
	case OPCODE_BREAKC:
	case OPCODE_CONTINUEC:
	case OPCODE_RETC:
	case OPCODE_DISCARD:
		{
			psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token);
			psInst->ui32NumOperands = 1;
			psInst->ui32FirstSrc = 0;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			break;
		}
	case OPCODE_CALLC:
		{
			psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token);
			psInst->ui32NumOperands = 2;
			psInst->ui32FirstSrc = 0;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			break;
		}
	case OPCODE_CUSTOMDATA:
		{
			psInst->ui32NumOperands = 0;
			ui32TokenLength = pui32Token[1];
			break;
		}
	case OPCODE_EVAL_CENTROID:
		{
			psInst->ui32NumOperands = 2;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			break;
		}
	case OPCODE_EVAL_SAMPLE_INDEX:
	case OPCODE_EVAL_SNAPPED:
	case OPCODE_STORE_UAV_TYPED:
	case OPCODE_LD_UAV_TYPED:
	case OPCODE_LD_RAW:
	case OPCODE_STORE_RAW:
		{
			psInst->ui32NumOperands = 3;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			break;
		}
	case OPCODE_STORE_STRUCTURED:
	case OPCODE_LD_STRUCTURED:
		{
			psInst->ui32NumOperands = 4;
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[3]);
			break;
		}
	case OPCODE_RESINFO:
		{
			psInst->ui32NumOperands = 3;

			psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]);

			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[0]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[1]);
			ui32OperandOffset += DecodeOperand(pui32Token+ui32OperandOffset, &psInst->asOperands[2]);
			break;
		}
	case OPCODE_MSAD:
	default:
		{
			ASSERT(0);
			break;
		}
	}

	UpdateInstructionReferences(psShader, psInst);

	return pui32Token + ui32TokenLength;
}

void BindTextureToSampler(Shader* psShader, uint32_t ui32TextureRegister, uint32_t ui32SamplerRegister, uint32_t bCompare)
{
	uint32_t ui32Sampler, ui32TextureUnit, bLoad;
	ASSERT(ui32TextureRegister < (1 << 10));
	ASSERT(ui32SamplerRegister < (1 << 10));

	if (psShader->sInfo.ui32NumSamplers >= MAX_RESOURCE_BINDINGS)
	{
		ASSERT(0);
		return;
	}

	ui32TextureUnit = ui32TextureRegister;
	for (ui32Sampler = 0; ui32Sampler < psShader->sInfo.ui32NumSamplers; ++ui32Sampler)
	{
		if (psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10TextureBindPoint == ui32TextureRegister)
		{
			if (psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10SamplerBindPoint == ui32SamplerRegister)
				break;
			ui32TextureUnit = MAX_RESOURCE_BINDINGS; // Texture is used by two or more samplers - assign to an available texture unit later
		}
	}

	// MAX_RESOURCE_BINDINGS means no sampler object (used for texture load)
	bLoad = ui32SamplerRegister == MAX_RESOURCE_BINDINGS;

	if (bCompare)
		psShader->sInfo.asSamplers[ui32Sampler].sMask.bCompareSample  = 1;
	else if (!bLoad)
		psShader->sInfo.asSamplers[ui32Sampler].sMask.bNormalSample   = 1;
	else
	{
		psShader->sInfo.asSamplers[ui32Sampler].sMask.bNormalSample   = 0;
		psShader->sInfo.asSamplers[ui32Sampler].sMask.bCompareSample  = 0;
	}

	if (ui32Sampler == psShader->sInfo.ui32NumSamplers)
	{
		psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10TextureBindPoint = ui32TextureRegister;
		psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10SamplerBindPoint = ui32SamplerRegister;
		psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10TextureUnit = ui32TextureUnit;
		++psShader->sInfo.ui32NumSamplers;
	}
}

void RegisterUniformBuffer(Shader* psShader, ResourceGroup eGroup, uint32_t ui32BindPoint)
{
	uint32_t ui32UniformBuffer = psShader->sInfo.ui32NumUniformBuffers;
	psShader->sInfo.asUniformBuffers[ui32UniformBuffer].ui32BindPoint = ui32BindPoint;
	psShader->sInfo.asUniformBuffers[ui32UniformBuffer].eGroup = eGroup;
	++psShader->sInfo.ui32NumUniformBuffers;
}

void RegisterStorageBuffer(Shader* psShader, ResourceGroup eGroup, uint32_t ui32BindPoint)
{
	uint32_t ui32StorageBuffer = psShader->sInfo.ui32NumStorageBuffers;
	psShader->sInfo.asStorageBuffers[ui32StorageBuffer].ui32BindPoint = ui32BindPoint;
	psShader->sInfo.asStorageBuffers[ui32StorageBuffer].eGroup = eGroup;
	++psShader->sInfo.ui32NumStorageBuffers;
}

void RegisterImage(Shader* psShader, ResourceGroup eGroup, uint32_t ui32BindPoint)
{
	uint32_t ui32Image = psShader->sInfo.ui32NumImages;
	psShader->sInfo.asImages[ui32Image].ui32BindPoint = ui32BindPoint;
	psShader->sInfo.asImages[ui32Image].eGroup = eGroup;
	++psShader->sInfo.ui32NumImages;
}

void AssignRemainingSamplers(Shader* psShader)
{
	uint32_t ui32Sampler;
	uint32_t aui32TextureUnitsUsed[(MAX_RESOURCE_BINDINGS + 31) / 32];
	uint32_t ui32MinAvailUnit;

	memset((void*)aui32TextureUnitsUsed, 0, sizeof(aui32TextureUnitsUsed));
	for (ui32Sampler = 0; ui32Sampler < psShader->sInfo.ui32NumSamplers; ++ui32Sampler)
	{
		uint32_t ui32Unit = psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10TextureUnit;
		if (ui32Unit < MAX_RESOURCE_BINDINGS)
			aui32TextureUnitsUsed[ui32Unit / 32] |= 1 << (ui32Unit % 32);
	}

	ui32MinAvailUnit = 0;
	for (ui32Sampler = 0; ui32Sampler < psShader->sInfo.ui32NumSamplers; ++ui32Sampler)
	{
		uint32_t ui32Unit = psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10TextureUnit;
		if (ui32Unit == MAX_RESOURCE_BINDINGS)
		{
			uint32_t ui32Mask, ui32AvailUnit;
			uint32_t ui32WordIndex = ui32MinAvailUnit / 32;
			uint32_t ui32BitIndex = ui32MinAvailUnit % 32;

			while (ui32WordIndex < sizeof(aui32TextureUnitsUsed))
			{
				if (aui32TextureUnitsUsed[ui32WordIndex] != ~0L)
					break;
				++ui32WordIndex;
				ui32BitIndex = 0;
			}
			if (ui32WordIndex == sizeof(aui32TextureUnitsUsed))
			{
				ASSERT(0); // Not enough resource bindings
				break;
			}

			ui32Mask = aui32TextureUnitsUsed[ui32WordIndex];
			while (ui32BitIndex < 32)
			{
				if ((ui32Mask & (1 << ui32BitIndex)) == 0)
					break;
				++ui32BitIndex;
			}
			if (ui32BitIndex == 32)
			{
				ASSERT(0);
				break;
			}

			ui32AvailUnit = 32 * ui32WordIndex + ui32BitIndex;
			aui32TextureUnitsUsed[ui32WordIndex] |= (1 << ui32BitIndex);

			psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10TextureUnit = ui32AvailUnit;
			ui32MinAvailUnit = ui32AvailUnit + 1;

			ASSERT(psShader->sInfo.asSamplers[ui32Sampler].sMask.ui10TextureUnit < MAX_RESOURCE_BINDINGS);
		}
	}
}

void UpdateDeclarationReferences(Shader* psShader, Declaration* psDecl)
{
	switch (psDecl->eOpcode)
	{
		case OPCODE_DCL_CONSTANT_BUFFER:
			RegisterUniformBuffer(psShader, RGROUP_CBUFFER, psDecl->asOperands[0].aui32ArraySizes[0]);
			break;
		case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
			RegisterImage(psShader, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber);
			break;
		case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
			RegisterStorageBuffer(psShader, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber);
			break;
		case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
			RegisterStorageBuffer(psShader, RGROUP_UAV, psDecl->asOperands[0].aui32ArraySizes[0]);
			break;
		case OPCODE_DCL_RESOURCE_RAW:
			RegisterStorageBuffer(psShader, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber);
			break;
		case OPCODE_DCL_RESOURCE_STRUCTURED:
			RegisterStorageBuffer(psShader, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber);
			break;
	}
}

void UpdateInstructionReferences(Shader* psShader, Instruction* psInst)
{
	uint32_t ui32Operand;
	const uint32_t ui32NumOperands = psInst->ui32NumOperands;
	for(ui32Operand = 0; ui32Operand < ui32NumOperands; ++ui32Operand)
	{
		Operand* psOperand = &psInst->asOperands[ui32Operand];
		if(psOperand->eType == OPERAND_TYPE_INPUT || 
			psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT)
		{
			if(psOperand->iIndexDims == INDEX_2D)
			{
				if(psOperand->aui32ArraySizes[1] != 0)//gl_in[].gl_Position
				{
					psShader->abInputReferencedByInstruction[psOperand->ui32RegisterNumber] = 1;
				}
			}
			else
			{
				psShader->abInputReferencedByInstruction[psOperand->ui32RegisterNumber] = 1;
			}
		}
	}

	switch (psInst->eOpcode)
	{
	case OPCODE_SWAPC:
		psShader->bUseTempCopy = 1;
		break;
	case OPCODE_SAMPLE:
	case OPCODE_SAMPLE_L:
	case OPCODE_SAMPLE_D:
	case OPCODE_SAMPLE_B:
	case OPCODE_GATHER4:
		BindTextureToSampler(psShader, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 0);
		break;
	case OPCODE_SAMPLE_C_LZ:
	case OPCODE_SAMPLE_C:
	case OPCODE_GATHER4_C:
		BindTextureToSampler(psShader, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 1);
		break;
	case OPCODE_GATHER4_PO:
		BindTextureToSampler(psShader, psInst->asOperands[3].ui32RegisterNumber, psInst->asOperands[4].ui32RegisterNumber, 0);
		break;
	case OPCODE_GATHER4_PO_C:
		BindTextureToSampler(psShader, psInst->asOperands[3].ui32RegisterNumber, psInst->asOperands[4].ui32RegisterNumber, 1);
		break;
	case OPCODE_LD:
	case OPCODE_LD_MS:
		// MAX_RESOURCE_BINDINGS means no sampler object
		BindTextureToSampler(psShader, psInst->asOperands[2].ui32RegisterNumber, MAX_RESOURCE_BINDINGS, 0);
		break;
	}
}

const uint32_t* DecodeHullShaderJoinPhase(const uint32_t* pui32Tokens, Shader* psShader)
{
	const uint32_t* pui32CurrentToken = pui32Tokens;
	const uint32_t ui32ShaderLength = psShader->ui32ShaderLength;

	Instruction* psInst;

	//Declarations
	Declaration* psDecl;
	psDecl = hlslcc_malloc(sizeof(Declaration) * ui32ShaderLength);
	psShader->psHSJoinPhaseDecl = psDecl;
	psShader->ui32HSJoinDeclCount = 0;

	while(1) //Keep going until we reach the first non-declaration token, or the end of the shader.
	{
		const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, psDecl);

		if(pui32Result)
		{
			pui32CurrentToken = pui32Result;
			psShader->ui32HSJoinDeclCount++;
			psDecl++;

			if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength))
			{
				break;
			}
		}
		else
		{
			break;
		}
	}


	//Instructions
	psInst = hlslcc_malloc(sizeof(Instruction) * ui32ShaderLength);
	psShader->psHSJoinPhaseInstr = psInst;
	psShader->ui32HSJoinInstrCount = 0;

	while (pui32CurrentToken < (psShader->pui32FirstToken + ui32ShaderLength))
	{
		const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, psInst, psShader);

#ifdef _DEBUG
		if(nextInstr == pui32CurrentToken)
		{
			ASSERT(0);
			break;
		}
#endif

		pui32CurrentToken = nextInstr;
		psShader->ui32HSJoinInstrCount++;

		psInst++;
	}

	return pui32CurrentToken;
}

const uint32_t* DecodeHullShaderForkPhase(const uint32_t* pui32Tokens, Shader* psShader)
{
	const uint32_t* pui32CurrentToken = pui32Tokens;
	const uint32_t ui32ShaderLength = psShader->ui32ShaderLength;
	const uint32_t ui32ForkPhaseIndex = psShader->ui32ForkPhaseCount;

	Instruction* psInst;

	//Declarations
	Declaration* psDecl;
	psDecl = hlslcc_malloc(sizeof(Declaration) * ui32ShaderLength);

	ASSERT(ui32ForkPhaseIndex < MAX_FORK_PHASES);

	psShader->ui32ForkPhaseCount++;

	psShader->apsHSForkPhaseDecl[ui32ForkPhaseIndex] = psDecl;
	psShader->aui32HSForkDeclCount[ui32ForkPhaseIndex] = 0;

	while(1) //Keep going until we reach the first non-declaration token, or the end of the shader.
	{
		const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, psDecl);

		if(pui32Result)
		{
			pui32CurrentToken = pui32Result;
			psShader->aui32HSForkDeclCount[ui32ForkPhaseIndex]++;
			psDecl++;

			if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength))
			{
				break;
			}
		}
		else
		{
			break;
		}
	}


	//Instructions
	psInst = hlslcc_malloc(sizeof(Instruction) * ui32ShaderLength);
	psShader->apsHSForkPhaseInstr[ui32ForkPhaseIndex] = psInst;
	psShader->aui32HSForkInstrCount[ui32ForkPhaseIndex] = 0;

	while (pui32CurrentToken < (psShader->pui32FirstToken + ui32ShaderLength))
	{
		const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, psInst, psShader);

#ifdef _DEBUG
		if(nextInstr == pui32CurrentToken)
		{
			ASSERT(0);
			break;
		}
#endif

		pui32CurrentToken = nextInstr;

		if(psInst->eOpcode == OPCODE_HS_FORK_PHASE)
		{
			pui32CurrentToken = DecodeHullShaderForkPhase(pui32CurrentToken, psShader);
			return pui32CurrentToken;
		}

		psShader->aui32HSForkInstrCount[ui32ForkPhaseIndex]++;
		psInst++;
	}

	return pui32CurrentToken;
}

const uint32_t* DecodeHullShaderControlPointPhase(const uint32_t* pui32Tokens, Shader* psShader)
{
	const uint32_t* pui32CurrentToken = pui32Tokens;
	const uint32_t ui32ShaderLength = psShader->ui32ShaderLength;

	Instruction* psInst;

	//TODO one block of memory for instructions and declarions to reduce memory usage and number of allocs.
	//hlscc_malloc max(sizeof(declaration), sizeof(instruction) * shader length; or sizeof(DeclInst) - unifying both structs.

	//Declarations
	Declaration* psDecl;
	psDecl = hlslcc_malloc(sizeof(Declaration) * ui32ShaderLength);
	psShader->psHSControlPointPhaseDecl = psDecl;
	psShader->ui32HSControlPointDeclCount = 0;

	while(1) //Keep going until we reach the first non-declaration token, or the end of the shader.
	{
		const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, psDecl);

		if(pui32Result)
		{
			pui32CurrentToken = pui32Result;
			psShader->ui32HSControlPointDeclCount++;
			psDecl++;

			if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength))
			{
				break;
			}
		}
		else
		{
			break;
		}
	}


	//Instructions
	psInst = hlslcc_malloc(sizeof(Instruction) * ui32ShaderLength);
	psShader->psHSControlPointPhaseInstr = psInst;
	psShader->ui32HSControlPointInstrCount = 0;

	while (pui32CurrentToken < (psShader->pui32FirstToken + ui32ShaderLength))
	{
		const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, psInst, psShader);

#ifdef _DEBUG
		if(nextInstr == pui32CurrentToken)
		{
			ASSERT(0);
			break;
		}
#endif

		pui32CurrentToken = nextInstr;

		if(psInst->eOpcode == OPCODE_HS_FORK_PHASE)
		{
			pui32CurrentToken = DecodeHullShaderForkPhase(pui32CurrentToken, psShader);
			return pui32CurrentToken;
		}
		if(psInst->eOpcode == OPCODE_HS_JOIN_PHASE)
		{
			pui32CurrentToken = DecodeHullShaderJoinPhase(pui32CurrentToken, psShader);
			return pui32CurrentToken;
		}
		psInst++;
		psShader->ui32HSControlPointInstrCount++;
	}

	return pui32CurrentToken;
}

const uint32_t* DecodeHullShader(const uint32_t* pui32Tokens, Shader* psShader)
{
	const uint32_t* pui32CurrentToken = pui32Tokens;
	const uint32_t ui32ShaderLength = psShader->ui32ShaderLength;
	Declaration* psDecl;
	psDecl = hlslcc_malloc(sizeof(Declaration) * ui32ShaderLength);
	psShader->psHSDecl = psDecl;
	psShader->ui32HSDeclCount = 0;

	while(1) //Keep going until we reach the first non-declaration token, or the end of the shader.
	{
		const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, psDecl);

		if(pui32Result)
		{
			pui32CurrentToken = pui32Result;

			if(psDecl->eOpcode == OPCODE_HS_CONTROL_POINT_PHASE)
			{
				pui32CurrentToken = DecodeHullShaderControlPointPhase(pui32CurrentToken, psShader);
				return pui32CurrentToken;
			}
			if(psDecl->eOpcode == OPCODE_HS_FORK_PHASE)
			{
				pui32CurrentToken = DecodeHullShaderForkPhase(pui32CurrentToken, psShader);
				return pui32CurrentToken;
			}
			if(psDecl->eOpcode == OPCODE_HS_JOIN_PHASE)
			{
				pui32CurrentToken = DecodeHullShaderJoinPhase(pui32CurrentToken, psShader);
				return pui32CurrentToken;
			}

			psDecl++;
			psShader->ui32HSDeclCount++;

			if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength))
			{
				break;
			}
		}
		else
		{
			break;
		}
	}

	return pui32CurrentToken;
}

void Decode(const uint32_t* pui32Tokens, Shader* psShader)
{
	const uint32_t* pui32CurrentToken = pui32Tokens;
	const uint32_t ui32ShaderLength = pui32Tokens[1];
	Instruction* psInst;
	Declaration* psDecl;

	psShader->ui32MajorVersion = DecodeProgramMajorVersion(*pui32CurrentToken);
	psShader->ui32MinorVersion = DecodeProgramMinorVersion(*pui32CurrentToken);
	psShader->eShaderType = DecodeShaderType(*pui32CurrentToken);

	pui32CurrentToken++;//Move to shader length
	psShader->ui32ShaderLength = ui32ShaderLength;
	pui32CurrentToken++;//Move to after shader length (usually a declaration)

	psShader->pui32FirstToken = pui32Tokens;

#ifdef _DEBUG
	operandID = 0;
	instructionID = 0;
#endif

	if(psShader->eShaderType == HULL_SHADER)
	{
		pui32CurrentToken = DecodeHullShader(pui32CurrentToken, psShader);
		return;
	}

	//Using ui32ShaderLength as the instruction count
	//will allocate more than enough memory. Avoids having to
	//traverse the entire shader just to get the real instruction count.
	psInst = hlslcc_malloc(sizeof(Instruction) * ui32ShaderLength);
	psShader->psInst = psInst;
	psShader->ui32InstCount = 0;

	psDecl = hlslcc_malloc(sizeof(Declaration) * ui32ShaderLength);
	psShader->psDecl = psDecl;
	psShader->ui32DeclCount = 0;

	while(1) //Keep going until we reach the first non-declaration token, or the end of the shader.
	{
		const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, psDecl);

		if(pui32Result)
		{
			pui32CurrentToken = pui32Result;
			psShader->ui32DeclCount++;
			psDecl++;

			if(pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength))
			{
				break;
			}
		}
		else
		{
			break;
		}
	}

	while (pui32CurrentToken < (psShader->pui32FirstToken + ui32ShaderLength))
	{
		const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, psInst, psShader);

#ifdef _DEBUG
		if(nextInstr == pui32CurrentToken)
		{
			ASSERT(0);
			break;
		}
#endif

		pui32CurrentToken = nextInstr;
		psShader->ui32InstCount++;
		psInst++;
	}

	AssignRemainingSamplers(psShader);
}

Shader* DecodeDXBC(uint32_t* data)
{
	Shader* psShader;
	DXBCContainerHeader* header = (DXBCContainerHeader*)data;
	uint32_t i;
	uint32_t chunkCount;
	uint32_t* chunkOffsets;
	ReflectionChunks refChunks;
	uint32_t* shaderChunk = 0;

	if(header->fourcc != FOURCC_DXBC)
	{
		//Could be SM1/2/3. If the shader type token
		//looks valid then we continue
		uint32_t type = DecodeShaderTypeDX9(data[0]);

		if(type != INVALID_SHADER)
		{
			return DecodeDX9BC(data);
		}
		return 0;
	}

	refChunks.pui32Inputs = NULL;
	refChunks.pui32Interfaces = NULL;
	refChunks.pui32Outputs = NULL;
	refChunks.pui32Resources = NULL;
	refChunks.pui32Inputs11 = NULL;
	refChunks.pui32Outputs11 = NULL;
	refChunks.pui32OutputsWithStreams = NULL;

	chunkOffsets = (uint32_t*)(header + 1);

	chunkCount = header->chunkCount;

	for(i = 0; i < chunkCount; ++i)
	{
		uint32_t offset = chunkOffsets[i];

		DXBCChunkHeader* chunk = (DXBCChunkHeader*)((char*)data + offset);

		switch(chunk->fourcc)
		{
		case FOURCC_ISGN:
			{
				refChunks.pui32Inputs = (uint32_t*)(chunk + 1);
				break;
			}
		case FOURCC_ISG1:
			{
				refChunks.pui32Inputs11 = (uint32_t*)(chunk + 1);
				break;
			}
		case FOURCC_RDEF:
			{
				refChunks.pui32Resources = (uint32_t*)(chunk + 1);
				break;
			}
		case FOURCC_IFCE:
			{
				refChunks.pui32Interfaces = (uint32_t*)(chunk + 1);
				break;
			}
		case FOURCC_OSGN:
			{
				refChunks.pui32Outputs = (uint32_t*)(chunk + 1);
				break;
			}
		case FOURCC_OSG1:
			{
				refChunks.pui32Outputs11 = (uint32_t*)(chunk + 1);
				break;
			}
		case FOURCC_OSG5:
			{
				refChunks.pui32OutputsWithStreams = (uint32_t*)(chunk + 1);
				break;
			}
		case FOURCC_SHDR:
		case FOURCC_SHEX:
			{
				shaderChunk = (uint32_t*)(chunk + 1);
				break;
			}
		default:
			{
				break;
			}
		}
	}

	if(shaderChunk)
	{
		uint32_t ui32MajorVersion;
		uint32_t ui32MinorVersion;

		psShader = hlslcc_calloc(1, sizeof(Shader));

		ui32MajorVersion = DecodeProgramMajorVersion(*shaderChunk);
		ui32MinorVersion = DecodeProgramMinorVersion(*shaderChunk);

		LoadShaderInfo(ui32MajorVersion,
			ui32MinorVersion,
			&refChunks,
			&psShader->sInfo);

		Decode(shaderChunk, psShader);

		return psShader;
	}

	return 0;
}

